Detailed Survival analyis of the Survival lung data.
Libraries
library(survival)
library(FRESA.CAD)
## Loading required package: Rcpp
## Loading required package: stringr
## Loading required package: miscTools
## Loading required package: Hmisc
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
##
## format.pval, units
## Loading required package: pROC
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
op <- par(no.readonly = TRUE)
pander::panderOptions('digits', 3)
pander::panderOptions('keep.trailing.zeros',TRUE)
Libraries
data(lung)
## Warning in data(lung): data set 'lung' not found
lung$inst <- NULL
lung$status <- lung$status - 1
lung <- lung[complete.cases(lung),]
pander::pander(table(lung$status))
pander::pander(summary(lung$time))
Exploring Raw Features with RRPlot
convar <- colnames(lung)[lapply(apply(lung,2,unique),length) > 10]
convar <- convar[convar != "time"]
topvar <- univariate_BinEnsemble(lung[,c("status",convar)],"status")
pander::pander(topvar)
topv <- min(5,length(topvar))
topFive <- names(topvar)[1:topv]
RRanalysis <- list();
idx <- 1
for (topf in topFive)
{
RRanalysis[[idx]] <- RRPlot(cbind(lung$status,lung[,topf]),
atRate=c(0.90),
timetoEvent=lung$time,
title=topf,
# plotRR=FALSE
)
idx <- idx + 1
}






names(RRanalysis) <- topFive
Reporting the Metrics
ROCAUC <- NULL
CstatCI <- NULL
LogRangp <- NULL
Sensitivity <- NULL
Specificity <- NULL
for (topf in topFive)
{
CstatCI <- rbind(CstatCI,RRanalysis[[topf]]$c.index$cstatCI)
LogRangp <- rbind(LogRangp,RRanalysis[[topf]]$surdif$pvalue)
Sensitivity <- rbind(Sensitivity,RRanalysis[[topf]]$ROCAnalysis$sensitivity)
Specificity <- rbind(Specificity,RRanalysis[[topf]]$ROCAnalysis$specificity)
ROCAUC <- rbind(ROCAUC,RRanalysis[[topf]]$ROCAnalysis$aucs)
}
rownames(CstatCI) <- topFive
rownames(LogRangp) <- topFive
rownames(Sensitivity) <- topFive
rownames(Specificity) <- topFive
rownames(ROCAUC) <- topFive
pander::pander(ROCAUC)
| age |
0.590 |
0.493 |
0.688 |
| wt.loss |
0.553 |
0.455 |
0.651 |
pander::pander(CstatCI)
| age |
0.558 |
0.559 |
0.505 |
0.612 |
| wt.loss |
0.511 |
0.511 |
0.453 |
0.572 |
pander::pander(LogRangp)
pander::pander(Sensitivity)
| age |
0.1157 |
0.0647 |
0.187 |
| wt.loss |
0.0496 |
0.0184 |
0.105 |
pander::pander(Specificity)
| age |
0.872 |
0.743 |
0.952 |
| wt.loss |
0.894 |
0.769 |
0.965 |
meanMatrix <- cbind(ROCAUC[,1],CstatCI[,1],Sensitivity[,1],Specificity[,1])
colnames(meanMatrix) <- c("ROCAUC","C-Stat","Sen","Spe")
pander::pander(meanMatrix)
| age |
0.590 |
0.558 |
0.1157 |
0.872 |
| wt.loss |
0.553 |
0.511 |
0.0496 |
0.894 |
Modeling
ml <- BSWiMS.model(Surv(time,status)~1,data=lung,NumberofRepeats = 10)
[+++++++++++++++++++++++++++++]..
sm <- summary(ml)
pander::pander(sm$coefficients)
Table continues below
| ph.ecog |
4.32e-01 |
1.194 |
1.541 |
1.988 |
0.679 |
0.649 |
| sex |
-4.59e-01 |
0.456 |
0.632 |
0.876 |
0.649 |
0.679 |
| pat.karno |
-1.77e-03 |
0.997 |
0.998 |
1.000 |
0.506 |
0.720 |
| ph.karno |
-4.64e-07 |
1.000 |
1.000 |
1.000 |
0.577 |
0.720 |
| age |
4.57e-08 |
1.000 |
1.000 |
1.000 |
0.565 |
0.720 |
Table continues below
| ph.ecog |
0.601 |
0.601 |
0.620 |
0.600 |
0.0449 |
0.405 |
| sex |
0.601 |
0.620 |
0.601 |
0.600 |
0.0285 |
0.478 |
| pat.karno |
0.506 |
0.585 |
0.500 |
0.585 |
0.0292 |
0.342 |
| ph.karno |
0.577 |
0.570 |
0.500 |
0.570 |
0.0143 |
0.280 |
| age |
0.565 |
0.549 |
0.500 |
0.549 |
0.0162 |
0.195 |
| ph.ecog |
3.33 |
2.48 |
-0.02005 |
1.0 |
| sex |
2.76 |
2.85 |
-0.00167 |
1.0 |
| pat.karno |
2.44 |
2.24 |
0.08546 |
1.0 |
| ph.karno |
2.22 |
1.64 |
0.06998 |
0.8 |
| age |
1.97 |
1.14 |
0.04871 |
0.1 |
Expected time to event
toinclude <- rdata[,1]==1
obstiemToEvent <- lung$time
tmin<-min(obstiemToEvent)
sum(toinclude)
[1] 121
timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
tmax<-max(c(obstiemToEvent,timetoEvent))
lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
| timetoEvent[toinclude] |
0.808 |
0.0509 |
15.9 |
2.92e-31 |
Fitting linear model: obstiemToEvent[toinclude] ~ 0 +
timetoEvent[toinclude]
| 121 |
200 |
0.677 |
0.675 |
plot(timetoEvent,obstiemToEvent,
col=1+rdata[,1],
xlab="Expected time",
ylab="Observed time",
main="Expected vs. Observed",
xlim=c(tmin,tmax),
ylim=c(tmin,tmax),
log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
pch=c(1,1,-1),
col=c(1,2,"blue"),
lty=c(-1,-1,1)
)

MADerror2 <- mean(abs(timetoEvent[toinclude]-obstiemToEvent[toinclude]))
pander::pander(MADerror2)
163
Cox Calibration
op <- par(no.readonly = TRUE)
calprob <- CoxRiskCalibration(ml,lung,"status","time")
pander::pander(c(h0=calprob$h0,
Gain=calprob$hazardGain,
DeltaTime=calprob$timeInterval),
caption="Cox Calibration Parameters")
Expected time to event
timetoEvent <- meanTimeToEvent(rdata[,2],timeinterval)
tmax<-max(c(obstiemToEvent,timetoEvent))
lmfit <- lm(obstiemToEvent[toinclude]~0+timetoEvent[toinclude])
sm <- summary(lmfit)
pander::pander(sm)
| timetoEvent[toinclude] |
0.99 |
0.0623 |
15.9 |
2.92e-31 |
Fitting linear model: obstiemToEvent[toinclude] ~ 0 +
timetoEvent[toinclude]
| 121 |
200 |
0.677 |
0.675 |
plot(timetoEvent,obstiemToEvent,
col=1+rdata[,1],
xlab="Expected time",
ylab="Observed time",
main="Expected vs. Observed",
xlim=c(tmin,tmax),
ylim=c(tmin,tmax),
log="xy")
lines(x=c(tmin,tmax),y=lmfit$coefficients*c(tmin,tmax),lty=1,col="blue")
txt <- bquote(paste(R^2 == .(round(sm$r.squared,3))))
text(tmin+0.005*(tmax-tmin),tmax,txt,cex=0.7)
text(tmin+0.015*(tmax-tmin),tmax,sprintf("Slope=%4.3f",sm$coefficients[1]),cex=0.7)
legend("bottomright",legend=c("No Event","Event","Linear fit"),
pch=c(1,1,-1),
col=c(1,2,"blue"),
lty=c(-1,-1,1)
)

MADerror2 <-c(MADerror2,mean(abs(timetoEvent-obstiemToEvent)))
pander::pander(MADerror2)
163 and 165
Cross-Validation
rcv <- randomCV(theData=lung,
theOutcome = Surv(time,status)~1,
fittingFunction=BSWiMS.model,
trainFraction = 0.95,
repetitions=200,
classSamplingType = "Pro"
)
.[+++].[++].[+++].[++].[+++].[+++].[++].[+++].[+++].[+]10 Tested: 77
Avg. Selected: 3.5 Min Tests: 1 Max Tests: 3 Mean Tests: 1.298701 . MAD:
0.4783289
.[++].[+++].[++].[+++].[+++].[+++].[+++].[++++].[++].[+++]20 Tested:
114 Avg. Selected: 3.65 Min Tests: 1 Max Tests: 6 Mean Tests: 1.754386 .
MAD: 0.4773353
.[+-].[++++].[++].[+++].[++].[++].[++].[++].[+++].[++++]30 Tested:
138 Avg. Selected: 3.6 Min Tests: 1 Max Tests: 7 Mean Tests: 2.173913 .
MAD: 0.4785659
.[+++].[+++].[++-].[+++].[++].[++].[+++].[++].[+++].[++]40 Tested:
150 Avg. Selected: 3.575 Min Tests: 1 Max Tests: 8 Mean Tests: 2.666667
. MAD: 0.4800767
.[+++].[++].[+++-].[+++].[++++].[+++].[+++].[++++].[+++].[+++]50
Tested: 157 Avg. Selected: 3.66 Min Tests: 1 Max Tests: 9 Mean Tests:
3.184713 . MAD: 0.4793113
.[+-].[+++].[+++].[++++].[+++].[+++].[++++].[+++].[+++].[+++]60
Tested: 158 Avg. Selected: 3.716667 Min Tests: 1 Max Tests: 10 Mean
Tests: 3.797468 . MAD: 0.4788951
.[++].[+++].[+++].[++++].[++].[++].[+++].[+++].[+++].[++]70 Tested:
160 Avg. Selected: 3.714286 Min Tests: 1 Max Tests: 13 Mean Tests: 4.375
. MAD: 0.4792667
.[+++].[+++].[++-].[++].[+++].[+++].[+++].[+++].[+].[++]80 Tested:
166 Avg. Selected: 3.6875 Min Tests: 1 Max Tests: 13 Mean Tests:
4.819277 . MAD: 0.475646
.[+++].[++].[++].[+].[+++].[+++].[+++].[+++].[+++].[+++]90 Tested:
168 Avg. Selected: 3.677778 Min Tests: 1 Max Tests: 14 Mean Tests:
5.357143 . MAD: 0.475111
.[+++].[++].[+++].[++++].[+++].[++++].[+++].[+++].[+++].[++++]100
Tested: 168 Avg. Selected: 3.71 Min Tests: 1 Max Tests: 16 Mean Tests:
5.952381 . MAD: 0.4752537
.[++++].[++++].[+++].[+++].[+++].[+].[++].[++].[+].[+++]110 Tested:
168 Avg. Selected: 3.7 Min Tests: 1 Max Tests: 16 Mean Tests: 6.547619 .
MAD: 0.4750302
.[+++].[+++].[+++].[+++].[++].[+++].[+].[+++].[++].[++]120 Tested:
168 Avg. Selected: 3.683333 Min Tests: 1 Max Tests: 16 Mean Tests:
7.142857 . MAD: 0.4748065
.[+++].[++].[+++].[+++].[+++].[+++].[+++].[+++].[++].[++]130 Tested:
168 Avg. Selected: 3.684615 Min Tests: 2 Max Tests: 17 Mean Tests:
7.738095 . MAD: 0.4749498
.[+++].[+++].[+++].[+].[++].[+++].[+++].[+++].[+++].[+++]140 Tested:
168 Avg. Selected: 3.685714 Min Tests: 2 Max Tests: 17 Mean Tests:
8.333333 . MAD: 0.4746038
.[+++].[+++].[+++].[+++].[+-].[+++].[+++].[++].[+++].[+++]150 Tested:
168 Avg. Selected: 3.686667 Min Tests: 2 Max Tests: 20 Mean Tests:
8.928571 . MAD: 0.4746182
.[++].[+++].[++].[+].[+++].[++].[++].[++].[+].[+++]160 Tested: 168
Avg. Selected: 3.65 Min Tests: 2 Max Tests: 22 Mean Tests: 9.52381 .
MAD: 0.4744816
.[++].[+++].[+++].[+++].[+++].[+++].[+++].[+++].[+++].[++]170 Tested:
168 Avg. Selected: 3.658824 Min Tests: 3 Max Tests: 23 Mean Tests:
10.11905 . MAD: 0.4743399
.[+++].[++].[+++].[+++].[+].[++].[++].[+++].[+++].[++]180 Tested: 168
Avg. Selected: 3.644444 Min Tests: 3 Max Tests: 23 Mean Tests: 10.71429
. MAD: 0.4743689
.[++].[+++].[++].[++].[++].[+++].[+++].[+++].[+++].[+++]190 Tested:
168 Avg. Selected: 3.642105 Min Tests: 3 Max Tests: 23 Mean Tests:
11.30952 . MAD: 0.4744639
.[+++].[+++].[+++].[+++].[++].[++].[+++].[+++].[++].[+++]200 Tested:
168 Avg. Selected: 3.645 Min Tests: 3 Max Tests: 26 Mean Tests: 11.90476
. MAD: 0.4742153
stp <- rcv$survTestPredictions
stp <- stp[!is.na(stp[,4]),]
bbx <- boxplot(unlist(stp[,1])~rownames(stp),plot=FALSE)
times <- bbx$stats[3,]
status <- boxplot(unlist(stp[,2])~rownames(stp),plot=FALSE)$stats[3,]
prob <- ppoisGzero(boxplot(unlist(stp[,4])~rownames(stp),plot=FALSE)$stats[3,],h0)
rdatacv <- cbind(status,prob)
rownames(rdatacv) <- bbx$names
names(times) <- bbx$names
rrAnalysisTest <- RRPlot(rdatacv,atRate=c(0.90),
timetoEvent=times,
title="Test: Lung Cancer",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)






Calibrating the test results
rdatacv <- cbind(status,prob,times)
calprob <- CalibrationProbPoissonRisk(rdatacv)
pander::pander(c(h0=calprob$h0,
Gain=calprob$hazardGain,
DeltaTime=calprob$timeInterval),
caption="Cox Calibration Parameters")
timeinterval <- calprob$timeInterval;
rdata <- cbind(status,calprob$prob)
rrAnalysisTest <- RRPlot(rdata,atRate=c(0.90),
timetoEvent=times,
title="Calibrated Test: Lung",
ysurvlim=c(0.00,1.0),
riskTimeInterval=timeinterval)





